#Now we need to import our Data

Variables Names and Units

#let's first plot the data

hydrostation_bottle %>% 
 ggplot()+geom_point(aes(x=decy,y=`Sig-th`)) 

#Thi looks much better but still needs to interpretate

hydrostation_bottle %>%
  filter(`Sig-th`!=-999) %>%
  ggplot()+ geom_point(aes(x=decy,y=`Sig-th`))

#still nor super clear, lets try a line plot

hydrostation_bottle %>%
  filter(`Sig-th` !=-999 & Depth <20) %>%
  ggplot()+geom_line(aes(x=decy,y=`Sig-th`))

#clear seasonal signal for sigma theta, lets see how this compares to temp

hydrostation_bottle %>%
  filter(`Sig-th` !=-999 & Depth <20) %>%
  ggplot()+geom_point(aes(x=Temp,y=`Sig-th`))

#temperature and density are strongly correlates, but there appears to e 2 outlines that we will likely need to address at some point.
#we only have density data from 1988-present, but temp and salinity data from 1950s- present. 
#this means I can calculte seawater density from 1950s to present.  
#we can use the TEOs-10 to do this.

Teos-10 Toolbox in Package seacarb

?gsw #launches the documentation for the gibbs seawater toolbox (TEOS-10)
## starting httpd help server ... done
?gsw_sigma0
#it says we need absolute salinity and conservative temperature

#first we need absolute salinity 
?gsw_SA_from_SP
#practical salinity
#sea pressure (dbar)
#longitude 
#latitude

#let's plot our pressure data - it's missing before 1980's

hydrostation_bottle %>%
  ggplot()+geom_point(aes(x=decy,y=Pres))

#We have depth data for the time series
hydrostation_bottle %>%
  ggplot()+
  geom_point(aes(x=decy,y=Depth))

?gsw_p_from_z

#adds a pressure column from/to the depth and latN columns from hydrostation bottle

hydrostation_bottle= hydrostation_bottle %>%
  mutate(Pres_gsw=gsw_p_from_z(Depth*-1,latN))

#mutate() creates a new column that are functions of exisisting variables.It can also modify (if the name is the same as an existing column) and delete columns (by setting their value to NULL ).


hydrostation_bottle %>%
  ggplot()+
  geom_point(aes(x=Pres,y=Pres_gsw))
## Warning: Removed 3 rows containing missing values (`geom_point()`).

#checking lat long and salinity data

hydrostation_bottle %>%
  ggplot()+
  geom_point(aes(x=decy,y=latN))

hydrostation_bottle %>%
  ggplot()+
  geom_point(aes(x=decy,y=lonW))

hydrostation_bottle %>%
  ggplot()+
  geom_point(aes(x=decy,y=CTD_S))

hydrostation_bottle %>%
  ggplot()+
  geom_point(aes(x=decy,y=Sal1))

hydrostation_bottle= hydrostation_bottle %>%
  mutate(Pres_gsw=gsw_p_from_z(Depth*-1,latN)) %>%
  mutate(S_abs_gsw=gsw_SA_from_SP(Sal1,Pres_gsw,360-lonW,latN))

#check it
hydrostation_bottle %>%
  ggplot()+
  geom_point(aes(x=decy,y=S_abs_gsw))
## Warning: Removed 3 rows containing missing values (`geom_point()`).

#how else can I check my data?

hydrostation_bottle %>%
  filter(Sal1!=-999) %>%
  ggplot()+
  geom_point(aes(x=Sal1,y=S_abs_gsw))

#now we need to calculate the conservative temp

?gsw_CT_from_t

#We need absolute salinity, in-situ temp (ITS-90), and sea pressure

#add line to calculate conservative temp
HydroS= hydrostation_bottle %>%
 filter(Sal1!=-999) %>%
  mutate(Pres_gsw=gsw_p_from_z(Depth*-1,latN)) %>%
  mutate(S_abs_gsw=gsw_SA_from_SP(Sal1,Pres_gsw,360-lonW,latN)) %>%
  mutate(T_cons_gsw=gsw_CT_from_t(S_abs_gsw,Temp,Pres_gsw))

#let's check our data

HydroS %>%
  filter(Temp!=-999) %>%
  ggplot()+
  geom_point(aes(x=Temp,y=T_cons_gsw))

#add line to calculate conservative temperature

HydroS = hydrostation_bottle %>%  #replace the homeworkline here select? may be usefull or combination of filters.
  filter(Sal1!=-999) %>%
  filter(Temp!=-999) %>%
  mutate(Pres_gsw=gsw_p_from_z(Depth*-1,latN)) %>%
  mutate(S_abs_gsw=gsw_SA_from_SP(Sal1,Pres_gsw,360-lonW,latN)) %>%
  mutate(T_cons_gsw=gsw_CT_from_t(S_abs_gsw,Temp,Pres_gsw))
  
#add line to calculate conservative temperature

  HydroS = hydrostation_bottle %>%
  filter(Sal1!=-999) %>%
  filter(Temp!=-999) %>%
  mutate(Pres_gsw=gsw_p_from_z(Depth*-1,latN)) %>%
  mutate(S_abs_gsw=gsw_SA_from_SP(Sal1,Pres_gsw,360-lonW,latN)) %>%
  mutate(T_cons_gsw=gsw_CT_from_t(S_abs_gsw,Temp,Pres_gsw)) %>%
  mutate(Sig_th_gsw=gsw_sigma0(S_abs_gsw,T_cons_gsw))
  
  #our bottle and ctd salinity do not agree, this is likely the problem for the los-sig-th-gsw

  HydroS%>%
  filter(Sig_th_gsw<0) %>% 
    view()
  
  
  HydroS %>%
  filter(Sig_th_gsw<0) %>% 
  mutate(S_abs_gsw=gsw_SA_from_SP(CTD_S,Pres_gsw,360-lonW,latN)) %>%
  mutate(T_cons_gsw=gsw_CT_from_t(S_abs_gsw,Temp,Pres_gsw)) %>%
  mutate(Sig_th_gsw=gsw_sigma0(S_abs_gsw,T_cons_gsw)) 
## # A tibble: 1 × 19
##         Id yyyymmd  decy  time  latN  lonW Depth  Pres  Temp CTD_S  Sal1 Sig-t…¹
##      <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1   6.13e9  2.02e7 2019.  1730  32.2  64.5   3.5   3.6  28.9  36.6  1.15    23.3
## # … with 7 more variables: `O2(1)` <dbl>, OxFix <dbl>, Anom1 <dbl>,
## #   Pres_gsw <dbl>, S_abs_gsw <dbl>, T_cons_gsw <dbl>, Sig_th_gsw <dbl>, and
## #   abbreviated variable name ¹​`Sig-th`
  #view()
  
 HydroS_correctedS_a=HydroS %>%
  filter(Sig_th_gsw<0) %>% 
   mutate(S_abs_gsw=gsw_SA_from_SP(CTD_S,Pres_gsw,360-lonW,latN)) %>%
  mutate(T_cons_gsw=gsw_CT_from_t(S_abs_gsw,Temp,Pres_gsw)) %>%
  mutate(Sig_th_gsw=gsw_sigma0(S_abs_gsw,T_cons_gsw)) 

 HydroS_correctedS_b=HydroS %>%
    filter(Sig_th_gsw>0) 
 
 HydroS_corrected=rbind(HydroS_correctedS_a,HydroS_correctedS_b)
 
#row bind function represents a row bind function for vectors, data frame, and matrices to be arranged as rows.  It uses combine multiple data frames for data manipulation
 
 HydroS_corrected %>% 
   filter(`Sig-th`!=-999) %>% 
   ggplot()+geom_point(aes(x=`Sig-th`,y=Sig_th_gsw))

 HydroS_corrected %>% 
   ggplot()+geom_point(aes(x=Sig_th_gsw,y=Depth))+scale_y_reverse()+scale_x_continuous(position="top")+xlab("Depth (m)")+theme_classic()

Has surface sigma theta decreased over time?

HydroS_shallow = HydroS_corrected %>% 
  filter(Depth<30)

?lm

lm(Sig_th_gsw~decy,data = HydroS_shallow)
## 
## Call:
## lm(formula = Sig_th_gsw ~ decy, data = HydroS_shallow)
## 
## Coefficients:
## (Intercept)         decy  
##   33.404723    -0.004238
#coefficients (intercept and decy)

#y=mx+b

#y = sig_th_gsw

#x = decy

#coefficients: intercept = b, decy = m

#Sig_th_gsw = -0.004*decy + 33.4

#(kg/m^3/y)*y + (kg/m^3)

sig_theta_time_model=lm(Sig_th_gsw~decy, data=HydroS_shallow) 

summary(sig_theta_time_model)
## 
## Call:
## lm(formula = Sig_th_gsw ~ decy, data = HydroS_shallow)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5789 -0.8683  0.1070  0.8819  1.5805 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 33.4047230  1.6704055  19.998  < 2e-16 ***
## decy        -0.0042378  0.0008387  -5.053 4.59e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9401 on 3410 degrees of freedom
## Multiple R-squared:  0.007431,   Adjusted R-squared:  0.00714 
## F-statistic: 25.53 on 1 and 3410 DF,  p-value: 4.587e-07
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot=HydroS_shallow %>% 
  ggplot(aes(x=decy,y=Sig_th_gsw))+geom_point()+geom_line()+geom_smooth(method="lm")+ theme_classic()

ggplotly(plot)
## `geom_smooth()` using formula = 'y ~ x'